clear all
set more off, permanently
set maxvar 20000

** Grab the data
use final_data, clear

*** MAKING/CLEANING VARIABLES
{

gen one = 1
gen expect_any_debt = (student_debt > 0 & !missing(student_debt))
gen debt = expect_any_debt


// Make variables for which major is your Xth ranked major
foreach rank of numlist 1/10 {
gen major_rank_`rank' = .
foreach num of numlist 1/10 {
replace major_rank_`rank' = `num' if rank_`num' == `rank'
}
}

* Put majors in alphabetical order with new variable
foreach var of varlist major_group_? {
	recode `var' (1=1) (8=2) (3=3) (9=4) (10=5) (7=6) (4=7) (2=8) (5=9) (6=10) (0=11) (11=12), gen(`var'_abc)
}

// Actual salary data
matrix define S = (	68774, 131495 	\ 117259, 124615 	\ 87390, 108998 \	///
					82963, 90898	\ 81484, 104079		\ 59078, 74746	\	///
					63586, 80948	\ 65199, 77622		\ 50902, 67466	\	///
					96676, 115379)

//  All 2-member combinations of 1 to 10 (I couldn't figure out how to do	///
//  other than manually)					
matrix define M = 	///
		(	1, 2 	\	///
			1, 3 	\ 	///
			1, 4 	\ 	///
			1, 5 	\ 	///
			1, 6 	\ 	///
			1, 7 	\ 	///
			1, 8 	\ 	///
			1, 9 	\ 	///
			1, 10 	\ 	///
			2, 3 	\ 	///
			2, 4 	\ 	///
			2, 5 	\ 	///
			2, 6 	\ 	///
			2, 7 	\ 	///
			2, 8 	\ 	///
			2, 9 	\ 	///
			2, 10 	\ 	///
			3, 4 	\ 	///
			3, 5 	\ 	///
			3, 6 	\ 	///
			3, 7 	\ 	///
			3, 8 	\ 	///
			3, 9 	\ 	///
			3, 10 	\ 	///
			4, 5 	\ 	///
			4, 6 	\ 	///
			4, 7 	\ 	///
			4, 8 	\ 	///
			4, 9 	\ 	///
			4, 10 	\ 	///
			5, 6 	\ 	///
			5, 7 	\ 	///
			5, 8 	\ 	///
			5, 9 	\ 	///
			5, 10 	\ 	///
			6, 7	\ 	///
			6, 8 	\ 	///
			6, 9 	\ 	///
			6, 10 	\ 	///
			7, 8 	\ 	///
			7, 9 	\ 	///
			7, 10 	\ 	///
			8, 9	\	///
			8, 10 	\ 	///
			9, 10 		)
// All two member combinations of 1 to 5.
matrix define R = 	///
		(	1, 2 \ 	///
			1, 3 \	///
			1, 4 \	///
			1, 5 \	///
			2, 3 \	///
			2, 4 \	///
			2, 5 \	///
			3, 4 \	///
			3, 5 \	///
			4, 5 	)



gen parent_college = (parent_edu >= 5)

// Replace some variables with categorical, not string variables.
foreach var of varlist race sex {
encode `var', generate(new_`var')
drop `var'
rename new_`var' `var'
}
label define fix_noncitizen_label 1 "Asian" 2 "Black" 3 "Hispanic" 4 "Non-citizen" 5 "None Given/Race Unknown" 6 "Two or More Races" 7 "White"
label values race fix_noncitizen_label


// Add variables for actual, guess, error, etc salary by rank
foreach rank of numlist 1/5 {
gen actual_ba_rank_`rank' = .
gen actual_grad_rank_`rank' = .
gen guess_ba_rank_`rank' = .
gen guess_grad_rank_`rank' = .
foreach num of numlist 1/10 {
replace actual_ba_rank_`rank' = S[`num', 1] if rank_`num' == `rank'
replace actual_grad_rank_`rank' = S[`num', 2] if rank_`num' == `rank'
replace guess_ba_rank_`rank' = guess_ba_`num' if rank_`num' == `rank'
replace guess_grad_rank_`rank' = guess_grad_`num' if rank_`num' == `rank'
}
}
foreach rank of numlist 1/5 {
gen error_ba_rank_`rank' = .
gen error_grad_rank_`rank' = .

replace error_ba_rank_`rank' = 												///
			guess_ba_rank_`rank' - actual_ba_rank_`rank'
replace error_grad_rank_`rank' = 											///
			guess_grad_rank_`rank' - actual_grad_rank_`rank'
}

// Generate demographics variables
gen male = (sex == 2)
gen race_1 = (race == 1)
label variable race_1 Asian
gen race_2 = (race == 2)
label variable race_2 Black
gen race_3 = (race == 3)
label variable race_3 Hispanic
gen race_4 = (race == 4)
label variable race_4 Foreign
gen race_5 = (race == 5)
label variable race_5 "No Race Given"
gen race_6 = (race == 6)
label variable race_6 "Two or more races"
gen race_7 = (race == 7)
label variable race_7 "White"
label variable cum_gpa_0 "GPA Fall 2014"

label variable student_debt "Expected Student Debt"
label variable male "Male"
label variable parent_college "Parent has degree?"
label define sex_label 1 "Female" 2 "Male"
label values sex sex_label

label define maj_label 0 "Uncategorized" 1 "Bio/Chem" 2 "Pharmacy" 3 "Econ/Business" 4 "Math/Computers" 5 "Politics" 6 "Social Science" 7 "Humanities" 8 "Comm/Journalism" 9 "Education" 10 "Engineering" 11 "Undecided"
label values major_group_? maj_label

foreach num of numlist 1/10 {
gen actual_ba_`num' = S[`num', 1]
gen actual_grad_`num' = S[`num', 2]
	foreach lev in "ba" "grad" {
		gen error_`lev'_`num' = guess_`lev'_`num' - actual_`lev'_`num' 
		gen abs_error_`lev'_`num' = abs(error_`lev'_`num')
	}
}


// Variables for guess of and error in, and actual difference between majors' salaries
foreach num of numlist 1/45 {
local major1 = M[`num', 1]
local major2 = M[`num', 2]

gen guess_ba_diff_`major1'_`major2' = guess_ba_`major1' - guess_ba_`major2'
gen guess_grad_diff_`major1'_`major2' = guess_grad_`major1' - guess_grad_`major2'
gen actual_ba_diff_`major1'_`major2' =  S[`major1', 1] - S[`major2', 1]
gen actual_grad_diff_`major1'_`major2' =  S[`major1', 2] - S[`major2', 2]

}






gen guess_diff_rank_1_2 = guess_ba_rank_1 - guess_ba_rank_2
gen actual_diff_rank_1_2 = actual_ba_rank_1 - actual_ba_rank_2

gen error_diff_rank_1_2 = guess_diff_rank_1_2 - actual_diff_rank_1_2
gen abs_error_diff_rank_1_2 = abs(error_diff_rank_1_2)


gen error_diff_perc_of_rank_1 = error_diff_rank_1_2/actual_ba_rank_1
gen abs_error_diff_perc_of_rank_1 = abs(error_diff_perc_of_rank_1)

sum abs_error_diff_rank_1_2 abs_error_diff_perc_of_rank_1, d


foreach m of numlist 0/11 {
	gen num_maj`m'_classes = 0 
	foreach s of numlist 0/7 {
		replace num_maj`m'_classes = num_maj`m'_classes + num_maj`m'_classes_`s'
	}
	gen num_maj`m'_classes_as_of_0 = num_maj`m'_classes_0	
	foreach s of numlist 1 / 7 {
		gen num_maj`m'_classes_as_of_`s' = num_maj`m'_classes_as_of_`=`s'-1' + num_maj`m'_classes_`s'
	}
}

	

foreach r of numlist 1/10 {
	gen num_rank_`r'_classes = 0
	foreach s of numlist 0/7 {
		gen num_rank_`r'_classes_`s' = 0
		gen num_rank_`r'_classes_as_of_`s' = 0
	}
	foreach m of numlist 0/11 {
		replace num_rank_`r'_classes = num_maj`m'_classes if major_rank_`r' == `m'
		foreach s of numlist 0/7 {
			replace num_rank_`r'_classes_`s' = num_maj`m'_classes_`s' if major_rank_`r' == `m'
			replace num_rank_`r'_classes_as_of_`s' = num_maj`m'_classes_as_of_`s' if major_rank_`r' == `m'
		}
	}
}
	
foreach sem of numlist 0/7 {
foreach R of numlist 1/10 {
gen infield_R`R'_`sem' = 100*(major_group_`sem' == major_rank_`R')
}
}
foreach s of numlist 0/7 {
	gen dropped_out_`s' = 100*missing(major_group_`s')
	egen infield_R1to5_`s' = rowtotal(infield_R1_`s' infield_R2_`s' infield_R3_`s' infield_R4_`s' infield_R5_`s')
	egen infield_R6to10_`s' = rowtotal(infield_R6_`s' infield_R7_`s' infield_R8_`s' infield_R9_`s' infield_R10_`s')
	gen infield_R0_`s' = 100*(major_group_`s' == 0)
	egen classes_R1to5_`s' = rowtotal(num_rank_1_classes_`s' num_rank_2_classes_`s' num_rank_3_classes_`s' num_rank_4_classes_`s' num_rank_5_classes_`s')
	egen classes_R1_`s' = rowtotal(num_rank_1_classes_`s')
}

gen STEM = 100*inlist(major_rank_1, 1, 2, 4, 10)

foreach s of numlist 0/7 {
	gen STEM_`s' = 100*inlist(major_group_`s', 1, 2, 4, 10)

}

foreach s of numlist 0/7 {
	cap drop salary_ba_`s'
	gen salary_ba_`s' = .
	cap drop salary_grad_`s'
	gen salary_grad_`s' = .
	cap drop lucrative_ba_`s'
	gen lucrative_ba_`s' = 100*inlist(major_group_`s', 2, 10, 3, 4, 5)
	cap drop lucrative_grad_`s'
	gen lucrative_grad_`s' = 100*inlist(major_group_`s', 2, 10, 3, 1, 5)
	cap drop difficult_`s'
	gen difficult_`s' = 100*inlist(major_group_`s',10,8,2,4,5)
	foreach m of numlist 1/10 {
		replace salary_ba_`s' = actual_ba_`m' if major_group_`s' == `m'
		replace salary_grad_`s' = actual_grad_`m' if major_group_`s' == `m'
	}
}	


gen error_R1_ba = error_ba_rank_1
gen log_error_R1_ba = log(guess_ba_rank_1) - log(actual_ba_rank_1)

gen error_R1_grad = error_grad_rank_1
gen log_error_R1_grad = log(guess_grad_rank_1) - log(actual_grad_rank_1)

gen log_DinD_ba = (log(guess_ba_rank_1) - log(guess_ba_rank_2))	///
				-	(log(actual_ba_rank_1) - log(actual_ba_rank_2))
gen log_DinD_grad = (log(guess_grad_rank_1) - log(guess_grad_rank_2) )	///
				-	(log(actual_grad_rank_1) - log(actual_grad_rank_2))

gen DinD_ba = ((guess_ba_rank_1 - guess_ba_rank_2) - (actual_ba_rank_1 - actual_ba_rank_2))/1000
gen DinD_grad = ((guess_grad_rank_1 - guess_grad_rank_2) - (actual_grad_rank_1 - actual_grad_rank_2))/1000

}

*** TABLE 1
{
sum expect_any_debt
sum male race_* cum_gpa_0  parent_college								
sum student_debt if expect_any_debt == 1


bitest male == .509
gen race_cat = race
replace race_cat = 5 if inlist(race, 5, 6)
tab race_cat, gen(race_cat)

gen student_debt_cond = student_debt if expect_any_debt == 1
foreach var of varlist male race_cat1 race_cat2 race_cat3 race_cat5 race_cat4  	///
				parent_college expect_any_debt student_debt_cond cum_gpa_0 {
	qui sum `var' 
	if !inlist("`var'", "student_debt_cond", "cum_gpa_0") {
		local `var'_survey: display %3.1f `=100*`r(mean)''
	}
	if "`var'" == "student_debt_cond" {
		local `var'_survey: display %5.0f `r(mean)'
		local `var'_survey_sd: display %5.0f `r(sd)'
	}
	if "`var'" == "cum_gpa_0" {
		local `var'_survey: display %3.2f `r(mean)'
		local `var'_survey_sd: display %3.2f `r(sd)'
	}
}
local male_nonsurvey: display %3.1f `=100*0.509'
local race_cat1_nonsurvey: display %3.1f `=100*0.057'
local race_cat2_nonsurvey: display %3.1f `=100*0.053'
local race_cat3_nonsurvey: display %3.1f `=100*0.035'
local race_cat4_nonsurvey: display %3.1f `=100*0.106'
local race_cat5_nonsurvey: display %3.1f `=100*0.028'
local parent_college_nonsurvey = "Not reported"
local expect_any_debt_nonsurvey = "Not reported"
local student_debt_cond_nonsurvey = "Not reported"
local cum_gpa_0_nonsurvey = "Not reported"

local male_lab "Percent male"
local race_cat1_lab "Percent Asian"
local race_cat2_lab "Percent black"
local race_cat3_lab "Percent Hispanic"
local race_cat5_lab "Percent other race/ethnicity"
local race_cat4_lab "Percent non-citizen"
local parent_college_lab "Percent whose parents have degree"
local expect_any_debt_lab "Percent expecting to graduate with student debt"
local student_debt_cond_lab "Expected student debt conditional on expecting any"
local cum_gpa_0_lab "GPA (1st semester)"

file open table using "table1.csv", write replace
file write table ", Survey Sample, Whole Student Body" _n
foreach var in male race_cat1 race_cat2 race_cat3 race_cat5 race_cat4  	///
				parent_college expect_any_debt {
	file write table "``var'_lab', ``var'_survey', ``var'_nonsurvey'" _n
}
file write table ", Mean (Std. Dev.),"_n
foreach var in student_debt_cond cum_gpa_0 {
	file write table "``var'_lab', ``var'_survey' (``var'_survey_sd'), ``var'_nonsurvey'" _n
}
file close table
}

*** FIGURES 1 and A2, TABLES A1 and A2
{
preserve


keep 	guess_ba_1 guess_ba_2 guess_ba_3 guess_ba_4 guess_ba_5					///
		guess_ba_6 guess_ba_7 guess_ba_8 guess_ba_9 guess_ba_10					///
		guess_grad_1 guess_grad_2 guess_grad_3 guess_grad_4 guess_grad_5		///
		guess_grad_6 guess_grad_7 guess_grad_8 guess_grad_9 guess_grad_10
		
// Guess and error info by major group.		

foreach num of numlist 1/10 {
gen actual_ba_`num' = S[`num', 1]
gen actual_grad_`num' = S[`num', 2]
}

foreach rank in ba grad {
foreach num of numlist 1/10 {
gen error_`rank'_`num' = guess_`rank'_`num' - actual_`rank'_`num'
gen abs_error_`rank'_`num' = abs(error_`rank'_`num')
}
}

foreach var of varlist * {
gen sd_`var' = `var'
rename `var' mean_`var'
}		

collapse 	(mean) mean_* (sd) sd_*
gen x = _n
local stubs mean_guess_ba_ mean_guess_grad_ mean_actual_ba_ mean_actual_grad_	///
			mean_abs_error_ba_ mean_abs_error_grad_ sd_guess_ba_ sd_guess_grad_ 
						
reshape long `stubs' , i(x) j(major)
rename *_ *

foreach var of varlist * {
	format `var'  %9.0f
}


 label define majors 	1 "Bio/Chem" 2 "Pharmacy" 3 "Econ/Business"			///
						4 "Math/Computers" 5 "Politics" 6 "Social Science" 	///
						7 "Humanities" 8 "Comm/Journalism" 9 "Education"	///
						10 "Engineering"

gen major_abc = .
replace major_abc = 1 if major == 1
replace major_abc = 2 if major == 8
replace major_abc = 3 if major == 3
replace major_abc = 4 if major == 9
replace major_abc = 5 if major == 10
replace major_abc = 6 if major == 7
replace major_abc = 7 if major == 4
replace major_abc = 8 if major == 2
replace major_abc = 9 if major == 5
replace major_abc = 10 if major == 6
replace major_abc = 11 if major == 0
replace major_abc = 12 if major == 11

label define majors_abc 															///
			1 "Bio/Chem" 2 "Comm/Journ."  3 "Econ/Business"		///
			4 "Education" 5 "Engineering"  6 "Humanities" 					///
			7 "Math/Computers"  8 "Pharmacy" 9 "Politics" 10 "Social Sciences"		///
			11 "Undecided" 12 "Uncategorized"

			
label values major_abc majors_abc
label variable mean_guess_ba "Mean Belief"
label variable mean_actual_ba "Actual Average"
label variable mean_guess_grad "Mean Belief"
label variable mean_actual_grad "Actual Average"
label variable mean_abs_error_grad "Mean Abs. Error"
label variable mean_abs_error_ba "Mean Abs. Error"

sort major_abc

/*
listtex major_abc mean_guess_ba sd_guess_ba mean_actual_ba  	///
		mean_abs_error_ba  							///
		using "tableA1.tex", type rstyle(tabular)				///
		head("")							///
		foot("") replace
*/
listtex major_abc mean_guess_ba sd_guess_ba mean_actual_ba  	///
		mean_abs_error_ba  							///
		using "tableA1.csv", type rstyle(tabular)				///
		head("")							///
		foot("") replace
/*
listtex major_abc mean_guess_grad sd_guess_grad mean_actual_grad   	///
		mean_abs_error_grad 							///
		using "tableA2.tex", type rstyle(tabular)				///
		head("")							///
		foot("") replace
*/
listtex major_abc mean_guess_grad sd_guess_grad mean_actual_grad   	///
		mean_abs_error_grad 							///
		using "tableA2.csv", type rstyle(tabular)				///
		head("")							///
		foot("") replace
		
		replace mean_actual_ba = mean_actual_ba/1000
replace mean_guess_ba = mean_guess_ba/1000
replace mean_abs_error_ba = mean_abs_error_ba/1000
replace mean_actual_grad = mean_actual_grad/1000
replace mean_guess_grad = mean_guess_grad/1000
replace mean_abs_error_grad = mean_abs_error_grad/1000

graph bar (asis) mean_actual_ba mean_guess_ba mean_abs_error_ba, 				///
		over(major_abc, label(angle(45))) bar(1, fcolor(gs6*.9) lcolor(gs6*.9)			///
		) bar(2, fcolor(gs6*.6) lcolor(gs6*.6)) ytitle("$1,000")										///
		bar(3, fcolor(gs6*.3) lcolor(gs6*.3)) ylabel(0(20)120) yscale(range(0 120)) ///
		legend(rows(1) ring(1) position(6) symxsize(*1) size() bmargin(0 5 0 0)) ///
		graphregion(color(white) lwidth(vvvthick)) xsize(15) ysize(8) scale(*1.2)
		
graph export "figure1.pdf", as(pdf) replace

graph bar (asis) mean_actual_grad mean_guess_grad mean_abs_error_grad, 				///
		over(major_abc, label(angle(45))) bar(1, fcolor(gs6*.9) lcolor(gs6*.9)			///
		) bar(2, fcolor(gs6*.6) lcolor(gs6*.6)) ytitle("$1,000")									///
		bar(3, fcolor(gs6*.3) lcolor(gs6*.3)) ylabel(0(20)120) yscale(range(0 130)) ///
		legend(rows(1) nobox ring(1) position(6) symxsize(*.75) size() bmargin(0 5 0 0))	///
		graphregion(color(white) lwidth(vvvthick)) xsize(15) ysize(8) scale(*1.2)
		
graph export "figureA2.pdf", as(pdf) replace

restore
}


*** FIGURE A1
{
preserve
keep major_group_?_abc
gen nonrespondent = 0

append using "nonrespondent_major_groups"

label define nonresp_lab 0 "Survey Respondents" 1 "Rest of Respondents' Class"
label values nonrespondent nonresp_lab

label define majors_abc 															///
			1 "Bio/Chem" 2 "Comm/Journ."  3 "Econ/Business"		///
			4 "Education" 5 "Engineering"  6 "Humanities" 					///
			7 "Math/Computers"  8 "Pharmacy" 9 "Politics" 10 "Social Sciences"		///
			11 "Undecided" 12 "Uncategorized"

foreach s of numlist 0/7 {
	label values major_group_`s'_abc majors_abc
}
label values major_group_1_abc majors_abc
label values major_group_2_abc majors_abc
label values major_group_3_abc majors_abc
			
histogram 	major_group_0_abc if nonrespondent == 0, discrete percent lwidth(thin)					///
			xtitle(Field of Major (Fall 2014)) xlabel(1(1)12, valuelabel noticks angle(45))	///
			graphregion(color(white)) xtitle("") title("Survey Respondents") fcolor(gs6*.6) ///
			lcolor(gs6*.9) saving(resp_major_hist, replace) fxsize(100)	///
			scale(*1.2)
			
histogram 	major_group_0_abc if nonrespondent == 1, discrete percent lwidth(thin)					///
			xtitle(Field of Major (Fall 2014)) xlabel(1(1)12, valuelabel noticks angle(45))	///
			graphregion(color(white)) xtitle("") title("Rest of Respondents' Class") fcolor(gs6*.6) ///
			lcolor(gs6*.9) saving(nonresp_major_hist, replace)		///
			ylabel(, nolab tlength(0) tlcolor(white)) ytitle("") yscale( noline) fxsize(90)	///
			scale(*1.2)

graph combine 	"resp_major_hist"									///
				"nonresp_major_hist", graphregion(color(white))		///
				xsize(7) ysize(4)

graph export "figureA1.pdf", as(pdf) replace
			
rm "resp_major_hist.gph" 
rm "nonresp_major_hist.gph"
tab major_group_0_abc nonrespondent, chi2

restore
}


*** FIGURE 2
{
preserve
foreach var of varlist  infield_R1to5_7 infield_R6to10_7 infield_R0_7 dropped_out_7 {
	qui mean `var' , over(info one)
	local `var'_1: display %3.1f _b[_subpop_2]
	local `var'_0: display %3.1f _b[_subpop_1]
	qui test _subpop_1 = _subpop_2
	local `var'_p: display %4.3f `r(p)'
}
local stubs ""
foreach var of varlist infield_R1to5_7 infield_R6to10_7 infield_R0_7 dropped_out_7 {
	local stubs = "`stubs'" + "(mean) `var'_mean=`var' (sd) `var'_sd = `var' (count) `var'_N = `var'  "
}
collapse `stubs'  , by(info)
rename * value_*
rename value_info info

rename (value_*_sd) (sd_*)
rename (value_*_N) (N_*)
rename (value_*_mean) (mean_*)

reshape long mean_ sd_ N_, i(info) j(var) str
rename *_ *
gen se = sd/(N^(1/2))
drop N sd
gen order = 1 if var == "infield_R1to5_7"
replace order = 2 if var == "infield_R6to10_7"
replace order = 3 if var == "infield_R0_7"
replace order = 4 if var == "dropped_out_7"
gen spot = 3*(order) + (1-info) - 2

gen ci_l = mean - 1.645*se
gen ci_u = mean + 1.645*se


twoway 		(bar mean spot if info == 1, color(gs6*.9))			/// gs9
			(bar mean spot if info == 0, color(gs6*.3))			///
			(rcap ci_l ci_u spot if info == 1, color(black))			///
			(rcap ci_l ci_u spot if info == 0, color(black))			///
			, graphregion(color(white) lwidth(vvvthick)) legend(order(1 "Treatment" 2 "Control"))	///
			xlabel(	1.5 `""Fields Ranked" "1 to 5""'	 						///
					4.5 `""Fields Ranked" "6 to 10""'						////
					7.5 `""Uncategorized" "Fields""'		///
					10.5 `"Dropped out"', noticks)					///
			xscale(range(0 12)) xtitle("") yscale(range(0 75))	///
			text(74 1.5 "p = `infield_R1to5_7_p'")		///
			text(74 4.5 "p = `infield_R6to10_7_p'")		///
			text(74 7.5 "p = `infield_R0_7_p'")		///
			text(74 10.5 "p = `dropped_out_7_p'")		///
			ytitle(Percent) title() xsize(15) ysize(7)	///
			note("") scale(*1.4)
graph export "figure2.pdf", replace	
restore	
}

*** TABLE 2
{
estimates clear
preserve
label var info "Info"
label var infield_R1_0 "Began in Rank 1 Field"
label var infield_R1to5_0 "Began in Rank 1-5 Field"
foreach R in 1 1to5 {
	if "`R'" == "1" local tablename "table2panelB"
	if "`R'" == "1to5" local tablename "table2panelA"
	estimates clear
	foreach s of numlist 1/7 {
		qui eststo: qui reg infield_R`R'_`s' info infield_R`R'_0								///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r		 
	}
	esttab , replace				///
			se keep(info infield_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N)
/*
	esttab using `tablename'.tex, replace				///
			se keep(info infield_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N, fmt(%3.0f))	b(2) se(2)
*/
	esttab using `tablename'.csv, replace				///
			se keep(info infield_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N, fmt(%3.0f))	b(2) se(2)
}		

	
restore
}


*** TABLE 3
{
foreach R in 1 1to5 {
	egen classes_R`R'_8 = rowtotal(classes_R`R'_1 classes_R`R'_2 classes_R`R'_3 	///
					classes_R`R'_4 classes_R`R'_5 classes_R`R'_6 classes_R`R'_7)
}	
		
estimates clear
label var info "Info"
label var classes_R1_0 "\# Rank 1 Classes \hspace{3ex} in Fall 2014"
label var classes_R1to5_0 "\# Rank 1-5 Classes \hspace{3ex} in Fall 2014"
foreach R in 1 1to5 {
	if "`R'" == "1" local tablename "table3panelB"
	if "`R'" == "1to5" local tablename "table3panelA"
	estimates clear
	foreach s of numlist 1/8 {
		preserve
		
		if `s' != 8 drop if missing(major_group_`s')
		qui eststo: qui reg classes_R`R'_`s' info classes_R`R'_0								///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r	
				
		restore
	}
	esttab , replace				///
			p keep(info classes_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N) wrap varwidth(20)
/*
	esttab using `tablename'.tex, replace				///
			se keep(info classes_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N, fmt(%3.0f)) wrap varwidth(20)	///
			b(2) se(2)
*/
	esttab using `tablename'.csv, replace				///
			se keep(info classes_R`R'_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(info infield_R1_0) nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")									///
			indicate("Controls = honors_0") nogaps stats(N, fmt(%3.0f)) wrap varwidth(20)	///
			b(2) se(2)
}
}

*** TABLE 4 
{
estimates clear
eststo: reg lucrative_ba_7  info  lucrative_ba_0 							///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r
estadd local include_demos "Yes"
eststo: reg lucrative_grad_7  info  lucrative_grad_0 							///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r
estadd local include_demos "Yes"
eststo: reg STEM_7  info  STEM_0 							///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r
estadd local include_demos "Yes"
eststo: reg difficult_7  info  difficult_0 							///
				i.sex ib7.race cum_gpa_0 honors_0 student_debt parent_college			///
				 i.major_rank_1, r
estadd local include_demos "Yes"
label var STEM_0 "Began in STEM Field"
label var lucrative_ba_0 "Began in Lucrative (BA) Field"
label var lucrative_grad_0 "Began in Lucrative (PGD) Field"
label var difficult_0 "Began in Difficult Field"

esttab, 				///
	se keep(info lucrative_ba_0 lucrative_grad_0 STEM_0 difficult_0) 			///
	star(* .1 ** .05 *** .01)	label										///
	nomtitles nonum prehead (" ") 		///
	postfoot(" ") substitute(_ _) posthead("")	stats(include_demos N, label("Controls" "N") fmt(%3.0f) )								///
	nogaps b(2) se(2)	

/*
esttab using table4.tex, replace				///
	se keep(info lucrative_ba_0 lucrative_grad_0 STEM_0 difficult_0) 			///
	star(* .1 ** .05 *** .01)	label										///
	nomtitles nonum prehead (" ") 		///
	postfoot(" ") substitute(_ _) posthead("")	stats(include_demos N, label("Controls" "N") fmt(%3.0f) )								///
	nogaps b(2) se(2) 
*/
esttab using table4.csv, replace				///
	se keep(info lucrative_ba_0 lucrative_grad_0 STEM_0 difficult_0) 			///
	star(* .1 ** .05 *** .01)	label										///
	nomtitles nonum prehead (" ") 		///
	postfoot(" ") substitute(_ _) posthead("")	stats(include_demos N, label("Controls" "N") fmt(%3.0f) )								///
	nogaps b(2) se(2) 	
}

*** TABLE 5 
{
		local demographics infield_R1_0 i.sex ib7.race cum_gpa_0 honors_0 parent_college student_debt i.major_rank_1  `extra_controls'
		estimates clear
		foreach err_var in error_R1 DinD {
			preserve
			cap drop over_X_info
			cap drop under_X_info
			cap drop over
			gen over = `err_var'_ba > 0
			gen over_X_info = over*info
			gen under_X_info = (over==0)*info
			local extra_var ""
			if inlist("`err_var'", "DinD") local extra_var i.major_rank_2
			eststo reg_`err_var'_over:  reg infield_R1_7  over_X_info ///
						under_X_info  over 	`extra_var'					///
				`demographics', r	level(95)
			estadd local include_demos "Yes": reg_`err_var'_over
			restore
		}
		foreach err_var in error_R1 log_error_R1 DinD log_DinD {
			preserve
			cap drop error
			cap drop error_X_info 
			cap drop error_X_control 
			qui gen error = `err_var'_ba
			qui gen error_X_info = error*info
			qui gen error_X_control = error*(info==0)
			local extra_var ""
			if inlist("`err_var'", "DinD", "log_DinD") local extra_var i.major_rank_2
			eststo reg_`err_var': reg infield_R1_7  error_X_info ///
						 error_X_control  info 	`extra_var'							///
				`demographics'  , r	
			test error_X_info = error_X_control
			estadd local p_diff "`r(p)'": reg_`err_var'
			estadd local include_demos "Yes": reg_`err_var'
			restore
		}
		preserve
		local DinD_X_info_lab "Relative Error \$\times\$ Info"
		local DinD_X_control_lab "Relative Error \$\times\$ Control"
		local over_X_info_lab "Overestimate  \$\times\$ Info"
		local under_X_info_lab "Underestimate \$\times\$ Info"
		local over_lab "Overestimate"
		local error_X_info_lab "Error \$\times\$ Info"
		local error_X_control_lab "Error \$\times\$ Control"
		foreach var in error_X_info error_X_control  ///
					  over_X_info ///
						under_X_info  over  {
			cap drop `var'
			gen `var' = .
			label var `var' "``var'_lab'"
		}
		label var infield_R1_0 "Began in Rank 1 Field"
		label var info "Info"
/*
		esttab reg_error_R1_over  reg_error_R1 reg_log_error_R1 			///
				reg_DinD_over  reg_DinD reg_log_DinD 						///
				using table5.tex, replace				///
			se keep(over_X_info under_X_info over error_X_info error_X_control info infield_R1_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(over_X_info under_X_info over error_X_info error_X_control info infield_R1_0 ) ///
			nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")	stats(include_demos N, label("Controls" "N") fmt(%3.0f) )								///
			nogaps b(2) se(2)
*/
		esttab reg_error_R1_over  reg_error_R1 reg_log_error_R1 			///
				reg_DinD_over  reg_DinD reg_log_DinD 						///
				using table5.csv, replace				///
			se keep(over_X_info under_X_info over error_X_info error_X_control info infield_R1_0) 			///
			star(* .1 ** .05 *** .01)	label										///
			order(over_X_info under_X_info over error_X_info error_X_control info infield_R1_0 ) ///
			nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")	stats(include_demos N, label("Controls" "N") fmt(%3.0f) )								///
			nogaps b(2) se(2)
		restore
}

*** TABLES B1-B3
{
* Regressions of guess and absolute error

preserve

// Make the data such that each person's ranks are different observations
keep id actual_* guess_ba_rank* guess_grad_rank* major_rank_*	///
		sex race cum_gpa* parent_college student_debt `extra_controls'
		

reshape long actual_ba_rank_ actual_grad_rank_ guess_ba_rank_ 				///
		guess_grad_rank_	///
		major_rank_															///
		, i(id) j(rank)

rename 	(	actual_ba_rank_ guess_ba_rank_ guess_grad_rank_ 				///
			actual_grad_rank_ major_rank_)							///
		(	actual_ba guess_ba guess_grad actual_grad major)

gen error_ba = guess_ba - actual_ba
gen error_grad = guess_grad - actual_grad
gen abs_error_ba = abs(error_ba)
gen abs_error_grad = abs(error_grad)


label variable rank "Rank"
label define ranks 1 "Rank 1" 2 "Rank 2" 3 "Rank 3" 4 "Rank 4" 5 "Rank 5"
label values rank ranks
label variable actual_ba "Actual BA"
label variable actual_grad "Actual PGD"

// Guess regressions
eststo col1: qui reg guess_ba actual_ba i.sex ib7.race cum_gpa_0 student_debt parent_college i.rank `extra_controls', vce(cluster id)
eststo col2: qui reg guess_grad actual_grad i.sex ib7.race cum_gpa_0 student_debt parent_college i.rank `extra_controls', vce(cluster id)

/*
esttab col* using tableB1.tex, 							///
star(* .1 ** .05 *** .01) se label replace nobaselevels r2 ar2	nogaps			///
mtitles("BA" "PGD") order(actual_ba actual_grad)													///
nonotes drop() compress									///
postfoot("") ///
prehead("")
*/
esttab col* using tableB1.csv, 							///
star(* .1 ** .05 *** .01) se label replace nobaselevels r2 ar2	nogaps			///
mtitles("BA" "PGD") order(actual_ba actual_grad)													///
nonotes drop() compress									///
postfoot("") ///
prehead("")
qui reg guess_ba actual_ba i.sex cum_gpa_0 student_debt parent_college i.rank `extra_controls', vce(cluster id)
display e(r2)
display e(r2_a)

qui reg guess_grad actual_grad i.sex cum_gpa_0 student_debt parent_college i.rank `extra_controls', vce(cluster id)
display e(r2)
display e(r2_a)

eststo clear


eststo col1: qui reg abs_error_ba i.sex ib7.race cum_gpa_0 student_debt parent_college i.rank i.major `extra_controls', vce(cluster id)
eststo col2: qui reg abs_error_grad i.sex ib7.race cum_gpa_0 student_debt parent_college i.rank i.major `extra_controls', vce(cluster id)
/*
esttab using tableB2.tex, 							///
 se label replace nobaselevels r2 ar2	compress star(* .1 ** .05 *** .01)			///
mtitles("BA" "PGD") nogaps											///
 nonotes drop(*.major)										///
postfoot("") ///
prehead("") 
*/
esttab using tableB2.csv, 							///
 se label replace nobaselevels r2 ar2	compress star(* .1 ** .05 *** .01)			///
mtitles("BA" "PGD") nogaps											///
 nonotes drop(*.major)										///
postfoot("") ///
prehead("") 

qui reg abs_error_ba i.sex cum_gpa_0 student_debt parent_college i.rank i.major `extra_controls', vce(cluster id)
display e(r2)
display e(r2_a)
qui reg abs_error_grad i.sex cum_gpa_0 student_debt parent_college i.rank i.major `extra_controls', vce(cluster id)
display e(r2)
display e(r2_a)
restore

* Regressions of absolute error in difference between fields.
preserve
// Make data such that each respondents' guess about each pair is its own observation.
keep id sex race cum_gpa_0 student_debt parent_college guess_* actual_* `extra_controls' major_rank_1 major_rank_2
gen x = _n
local stubs guess_ba_diff_1_ actual_ba_diff_1_ guess_grad_diff_1_ actual_grad_diff_1_ ///
			guess_ba_diff_2_ actual_ba_diff_2_ guess_grad_diff_2_ actual_grad_diff_2_ ///
			guess_ba_diff_3_ actual_ba_diff_3_ guess_grad_diff_3_ actual_grad_diff_3_ ///
			guess_ba_diff_4_ actual_ba_diff_4_ guess_grad_diff_4_ actual_grad_diff_4_ ///
			guess_ba_diff_5_ actual_ba_diff_5_ guess_grad_diff_5_ actual_grad_diff_5_ ///
			guess_ba_diff_6_ actual_ba_diff_6_ guess_grad_diff_6_ actual_grad_diff_6_ ///
			guess_ba_diff_7_ actual_ba_diff_7_ guess_grad_diff_7_ actual_grad_diff_7_ ///
			guess_ba_diff_8_ actual_ba_diff_8_ guess_grad_diff_8_ actual_grad_diff_8_ ///
			guess_ba_diff_9_ actual_ba_diff_9_ guess_grad_diff_9_ actual_grad_diff_9_ ///
			guess_ba_diff_10_ actual_ba_diff_10_ guess_grad_diff_10_ actual_grad_diff_10_

reshape long `stubs' , i(x major_rank_1 major_rank_2) j(major2)

rename (*_) (*) 
drop x
gen x = _n
local stubs guess_ba_diff_ actual_ba_diff_ guess_grad_diff_ actual_grad_diff_ 
reshape long `stubs' , i(x major_rank_1 major_rank_2) j(major1)

rename (*_) (*) 	
drop if guess_ba_diff == .

gen err_ba_diff = guess_ba_diff - actual_ba_diff
gen err_grad_diff = guess_grad_diff - actual_grad_diff

gen abs_err_ba_diff = abs(err_ba_diff)
gen abs_err_grad_diff = abs(err_grad_diff)

sum abs_err_ba_diff
sum abs_err_grad_diff

gen abs_act_ba_diff = abs(actual_ba_diff)

tostring major1, generate(major1_s)
tostring major2, generate(major2_s)
gen pair = major1_s + major2_s
encode pair, generate(new_pair)
drop pair
rename new_pair pair

// regressions of error on demographics
eststo col1: qui reg abs_err_ba_diff i.sex ib7.race cum_gpa_0 student_debt parent_college i.pair `extra_controls', vce(cluster id)
eststo col2: qui reg abs_err_grad_diff i.sex ib7.race cum_gpa_0 student_debt parent_college i.pair `extra_controls', vce(cluster id)

eststo col3: qui reg abs_err_ba_diff i.sex ib7.race cum_gpa_0 student_debt parent_college i.pair `extra_controls' if inlist(major1, major_rank_1, major_rank_2) & inlist(major2, major_rank_1, major_rank_2), vce(cluster id)
eststo col4: qui reg abs_err_grad_diff i.sex ib7.race cum_gpa_0 student_debt parent_college i.pair `extra_controls' if inlist(major1, major_rank_1, major_rank_2) & inlist(major2, major_rank_1, major_rank_2), vce(cluster id)

/*
esttab col3 col4 col1 col2  using tableB3.tex, 							///
star(* .1 ** .05 *** .01) se label replace nobaselevels	r2 ar2			///
nogaps														///
nonotes drop(*.pair)										///
nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")
*/
esttab col3 col4 col1 col2  using tableB3.csv, 							///
star(* .1 ** .05 *** .01) se label replace nobaselevels	r2 ar2			///
nogaps														///
nonotes drop(*.pair)										///
nomtitles nonum prehead (" ") 		///
			postfoot(" ") substitute(_ _) posthead("")	

eststo clear
qui reg abs_err_ba_diff i.sex cum_gpa_0 student_debt parent_college i.pair `extra_controls', vce(cluster id)

display e(r2)
display e(r2_a)
qui reg abs_err_grad_diff i.sex cum_gpa_0 student_debt parent_college i.pair `extra_controls', vce(cluster id)
display e(r2)
display e(r2_a)
restore


}


